ratio_total_ARPA_inc = ARPA_inc_gub / ARPA_inc_sen,
total_CARES_ARPA_inc_state_gub = CARES_inc_state_gub + ARPA_inc_state_gub,
total_CARES_ARPA_inc_state_sen = CARES_inc_state_sen + ARPA_inc_state_sen,
ratio_total_CARES_ARPA_inc_state = total_CARES_ARPA_inc_state_gub / total_CARES_ARPA_inc_state_sen,
ratio_total_CARES_inc_state = CARES_inc_state_gub / CARES_inc_state_sen,
ratio_total_ARPA_inc_state = ARPA_inc_state_gub / ARPA_inc_state_sen,
total_CARES_ARPA_gub = CARES_gub + ARPA_gub,
total_CARES_ARPA_sen = CARES_sen + ARPA_sen,
ratio_total_CARES_ARPA = total_CARES_ARPA_gub / total_CARES_ARPA_sen,
ratio_total_CARES = CARES_gub / CARES_sen,
ratio_total_ARPA = ARPA_gub / ARPA_sen
)
# Add senate-specific and senate class non-specific lagged variables for each ratio
# Lagged variables capture the value of a variable from the previous time period
df_main5 <- df_main4 %>%
filter(year <= 2022 & year >= 2020) %>%
arrange(state, office, year) %>%  # Arrange the data by state, office, and year
group_by(state, office) %>%  # Group the data by state and office for calculating lagged variables
mutate(
lagged_ratio_total_CARES_ARPA_inc = lag(ratio_total_CARES_ARPA_inc),
lagged_ratio_total_CARES_inc = lag(ratio_total_CARES_inc),
lagged_ratio_total_ARPA_inc = lag(ratio_total_ARPA_inc),
lagged_ratio_total_CARES_ARPA_inc_state = lag(ratio_total_CARES_ARPA_inc_state),
lagged_ratio_total_CARES_inc_state = lag(ratio_total_CARES_inc_state),
lagged_ratio_total_ARPA_inc_state = lag(ratio_total_ARPA_inc_state),
lagged_ratio_total_CARES_ARPA = lag(ratio_total_CARES_ARPA),
lagged_ratio_total_CARES = lag(ratio_total_CARES),
lagged_ratio_total_ARPA = lag(ratio_total_ARPA)
) %>%
ungroup() %>%  # Remove grouping to avoid affecting subsequent operations
arrange(state, office, senate_class, year) %>%  # Arrange data by state, office, senate class, and year
group_by(state, office, senate_class) %>%  # Group by state, office, and senate class for further lagged variables
mutate(
lagged_ratio_total_CARES_ARPA_inc_class_spec = lag(ratio_total_CARES_ARPA_inc),
lagged_ratio_total_CARES_inc_class_spec = lag(ratio_total_CARES_inc),
lagged_ratio_total_ARPA_inc_class_spec = lag(ratio_total_ARPA_inc),
lagged_ratio_total_CARES_ARPA_inc_state_class_spec = lag(ratio_total_CARES_ARPA_inc_state),
lagged_ratio_total_CARES_inc_state_class_spec = lag(ratio_total_CARES_inc_state),
lagged_ratio_total_ARPA_inc_state_class_spec = lag(ratio_total_ARPA_inc_state),
lagged_ratio_total_CARES_ARPA_class_spec = lag(ratio_total_CARES_ARPA),
lagged_ratio_total_CARES_class_spec = lag(ratio_total_CARES),
lagged_ratio_total_ARPA_class_spec = lag(ratio_total_ARPA)
) %>%
ungroup()  # Remove grouping to finalize the data
# 2SLS ----------------------------------------------------------------------- #
# Define a function to run the IV (Instrumental Variable) model with different configurations
# The function takes two arguments: the outcome variable and whether to include a control variable.
run_iv_model <- function(outcome_var, include_control = TRUE) {
# Create the formula for the IV model based on whether the control variable is included.
# If 'include_control' is TRUE, the control variable 'norm_vote_top2_share_sd' is included in the formula.
if (include_control) {
formula <- as.formula(paste0(outcome_var, " ~ total_1k_muni_aid_per_resident + norm_vote_top2_share_sd |
reps_per_million + norm_vote_top2_share_sd"))
} else {
# If 'include_control' is FALSE, the control variable is omitted from the formula.
formula <- as.formula(paste0(outcome_var, " ~ total_1k_muni_aid_per_resident |
reps_per_million"))
}
# Run the IV model using the 'ivreg' function from the AER package.
# The formula and data are passed to the function.
iv_model <- ivreg(formula, data = df_main5)
# Calculate clustered standard errors at the state level.
clustered_se <- coeftest(iv_model, vcov = vcovCL(iv_model, cluster = ~ state))
# Return the IV model object.
return(iv_model)
}
# Define the outcome variables that will be tested in the IV models.
# These are the ratios calculated earlier: overall CARES + ARPA, CARES, and ARPA.
outcome_vars <- c("ratio_total_CARES_ARPA_inc", "ratio_total_CARES_inc", "ratio_total_ARPA_inc")
# Initialize an empty list to store the results of the IV models.
models <- list()
# Loop through each outcome variable and run the IV model twice:
# Once with the control variable and once without.
for (outcome in outcome_vars) {
cat("\nRunning IV model for outcome:", outcome, "with control:\n")
# Run the IV model with the control variable included and store the result in the models list.
models[[paste0(outcome, "_with_control")]] <- run_iv_model(outcome_var = outcome, include_control = TRUE)
cat("\nRunning IV model for outcome:", outcome, "without control:\n")
# Run the IV model without the control variable and store the result in the models list.
models[[paste0(outcome, "_without_control")]] <- run_iv_model(outcome_var = outcome, include_control = FALSE)
}
library(ivreg)
rm(list = ls())
install.packages(ivreg)
install.packages("ivreg")
rm(list = ls())
master_directory = "C:/Users/Shantanu.Kamat/Dropbox/Elections/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(lmtest)
library(sandwich)
library(AER)
library(tidystringdist)
library(stargazer)
library(rio)
library(ropensecretsapi)
library(pdftools)
# Read the media coverage data from an Excel file located in the data_folder directory
media_coverage_states <- read_xlsx(paste0(data_folder, "Raw/media coverage by state.xlsx")) %>%
# Rename the columns for easier reference in the analysis
rename(
year = year,
state = state,
office = office,
incumbent = Incumbent,
inc_search = `Incumbent Search Term`, # Search term used for finding incumbent-related media
CARES = `CARES Act`,
CARES_inc_state = `CARES and Incumbent State`,
CARES_inc = `CARES and Incumbent`,
ARPA = `American Resuce Plan`,
ARPA_inc_state = `American Rescue and Incumbent State`,
ARPA_inc = `American Rescue and Incumbent`
)
# Read a Stata (.dta) file containing state election panel data
df_main1 <- read_dta(paste0(data_folder, "Processing/state_elections_panel_df5_20240731.dta"))
# Select relevant columns from the dataset for further analysis
# The select() function is used to keep only the necessary columns for this analysis
df_main2 <- df_main1 %>%
select(
year,
gub_year,  # Ensure gub_year is included for later merging
state,
office,
senate_class,                # The class of the senate seat (important for staggered elections)
norm_vote_top2_share_sd,     # Normal vote control variable
total_1k_muni_aid_per_resident,  # Total aid per resident (in thousands of dollars)
reps_per_million,             # Number of representatives per million residents (used as an instrumental variable)
pop
)
# Merge the selected election data with the media coverage data
# left_join() combines the two datasets based on shared columns (year, state, office, and senate_class)
df_main3 <- df_main2 %>%
left_join(media_coverage_states, by = c("year", "state", "office", "senate_class"))
# Filter out gubernatorial races, ensure gub_year is set, and prepare for merging
# Here we focus on gubernatorial races and prepare this data for comparison with senatorial races
df_gub <- df_main3 %>%
filter(office == "gub") %>%
# Ensure that gub_year is set to year for gubernatorial races
mutate(gub_year = ifelse(office == "gub", year, gub_year)) %>%
select(state, gub_year, office, CARES, CARES_inc_state, CARES_inc, ARPA, ARPA_inc_state, ARPA_inc) %>%
# Reassign office to "senate" for the purpose of merging with the senatorial data
mutate(office = "senate")
# Merge the modified gubernatorial data back with the original dataset containing senatorial races
# The suffix argument adds "_sen" or "_gub" to distinguish between columns with the same names
df_main4 <- df_main3 %>%
filter(office == "senate") %>%
left_join(df_gub, by = c("state", "gub_year", "office"), suffix = c("_sen", "_gub")) %>%
# Compute various ratios and totals to compare gubernatorial and senatorial races
mutate(
total_CARES_ARPA_inc_gub = CARES_inc_gub + ARPA_inc_gub,
total_CARES_ARPA_inc_sen = CARES_inc_sen + ARPA_inc_sen,
# Create a new variable that sums the total stories from gubernatorial and senatorial races
total_stories = (total_CARES_ARPA_inc_gub + total_CARES_ARPA_inc_sen)/pop,
# Calculate ratios comparing gubernatorial to senatorial media coverage
ratio_total_CARES_ARPA_inc = total_CARES_ARPA_inc_gub / total_CARES_ARPA_inc_sen,
ratio_total_CARES_inc = CARES_inc_gub / CARES_inc_sen,
ratio_total_ARPA_inc = ARPA_inc_gub / ARPA_inc_sen,
total_CARES_ARPA_inc_state_gub = CARES_inc_state_gub + ARPA_inc_state_gub,
total_CARES_ARPA_inc_state_sen = CARES_inc_state_sen + ARPA_inc_state_sen,
ratio_total_CARES_ARPA_inc_state = total_CARES_ARPA_inc_state_gub / total_CARES_ARPA_inc_state_sen,
ratio_total_CARES_inc_state = CARES_inc_state_gub / CARES_inc_state_sen,
ratio_total_ARPA_inc_state = ARPA_inc_state_gub / ARPA_inc_state_sen,
total_CARES_ARPA_gub = CARES_gub + ARPA_gub,
total_CARES_ARPA_sen = CARES_sen + ARPA_sen,
ratio_total_CARES_ARPA = total_CARES_ARPA_gub / total_CARES_ARPA_sen,
ratio_total_CARES = CARES_gub / CARES_sen,
ratio_total_ARPA = ARPA_gub / ARPA_sen
)
# Add senate-specific and senate class non-specific lagged variables for each ratio
# Lagged variables capture the value of a variable from the previous time period
df_main5 <- df_main4 %>%
filter(year <= 2022 & year >= 2020) %>%
arrange(state, office, year) %>%  # Arrange the data by state, office, and year
group_by(state, office) %>%  # Group the data by state and office for calculating lagged variables
mutate(
lagged_ratio_total_CARES_ARPA_inc = lag(ratio_total_CARES_ARPA_inc),
lagged_ratio_total_CARES_inc = lag(ratio_total_CARES_inc),
lagged_ratio_total_ARPA_inc = lag(ratio_total_ARPA_inc),
lagged_ratio_total_CARES_ARPA_inc_state = lag(ratio_total_CARES_ARPA_inc_state),
lagged_ratio_total_CARES_inc_state = lag(ratio_total_CARES_inc_state),
lagged_ratio_total_ARPA_inc_state = lag(ratio_total_ARPA_inc_state),
lagged_ratio_total_CARES_ARPA = lag(ratio_total_CARES_ARPA),
lagged_ratio_total_CARES = lag(ratio_total_CARES),
lagged_ratio_total_ARPA = lag(ratio_total_ARPA)
) %>%
ungroup() %>%  # Remove grouping to avoid affecting subsequent operations
arrange(state, office, senate_class, year) %>%  # Arrange data by state, office, senate class, and year
group_by(state, office, senate_class) %>%  # Group by state, office, and senate class for further lagged variables
mutate(
lagged_ratio_total_CARES_ARPA_inc_class_spec = lag(ratio_total_CARES_ARPA_inc),
lagged_ratio_total_CARES_inc_class_spec = lag(ratio_total_CARES_inc),
lagged_ratio_total_ARPA_inc_class_spec = lag(ratio_total_ARPA_inc),
lagged_ratio_total_CARES_ARPA_inc_state_class_spec = lag(ratio_total_CARES_ARPA_inc_state),
lagged_ratio_total_CARES_inc_state_class_spec = lag(ratio_total_CARES_inc_state),
lagged_ratio_total_ARPA_inc_state_class_spec = lag(ratio_total_ARPA_inc_state),
lagged_ratio_total_CARES_ARPA_class_spec = lag(ratio_total_CARES_ARPA),
lagged_ratio_total_CARES_class_spec = lag(ratio_total_CARES),
lagged_ratio_total_ARPA_class_spec = lag(ratio_total_ARPA)
) %>%
ungroup()  # Remove grouping to finalize the data
# 2SLS ----------------------------------------------------------------------- #
# Define a function to run the IV (Instrumental Variable) model with different configurations
# The function takes two arguments: the outcome variable and whether to include a control variable.
run_iv_model <- function(outcome_var, include_control = TRUE) {
# Create the formula for the IV model based on whether the control variable is included.
# If 'include_control' is TRUE, the control variable 'norm_vote_top2_share_sd' is included in the formula.
if (include_control) {
formula <- as.formula(paste0(outcome_var, " ~ total_1k_muni_aid_per_resident + norm_vote_top2_share_sd |
reps_per_million + norm_vote_top2_share_sd"))
} else {
# If 'include_control' is FALSE, the control variable is omitted from the formula.
formula <- as.formula(paste0(outcome_var, " ~ total_1k_muni_aid_per_resident |
reps_per_million"))
}
# Run the IV model using the 'ivreg' function from the AER package.
# The formula and data are passed to the function.
iv_model <- ivreg(formula, data = df_main5)
# Calculate clustered standard errors at the state level.
clustered_se <- coeftest(iv_model, vcov = vcovCL(iv_model, cluster = ~ state))
# Return the IV model object.
return(iv_model)
}
# Define the outcome variables that will be tested in the IV models.
# These are the ratios calculated earlier: overall CARES + ARPA, CARES, and ARPA.
outcome_vars <- c("ratio_total_CARES_ARPA_inc", "ratio_total_CARES_inc", "ratio_total_ARPA_inc")
# Initialize an empty list to store the results of the IV models.
models <- list()
# Loop through each outcome variable and run the IV model twice:
# Once with the control variable and once without.
for (outcome in outcome_vars) {
cat("\nRunning IV model for outcome:", outcome, "with control:\n")
# Run the IV model with the control variable included and store the result in the models list.
models[[paste0(outcome, "_with_control")]] <- run_iv_model(outcome_var = outcome, include_control = TRUE)
cat("\nRunning IV model for outcome:", outcome, "without control:\n")
# Run the IV model without the control variable and store the result in the models list.
models[[paste0(outcome, "_without_control")]] <- run_iv_model(outcome_var = outcome, include_control = FALSE)
}
rm(list = ls())
master_directory = "C:/Users/Shantanu.Kamat/Dropbox/Elections/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(lmtest)
library(sandwich)
library(AER)
library(tidystringdist)
library(stargazer)
library(rio)
library(ropensecretsapi)
library(pdftools)
library(ivreg)
master_directory = "H:/DataSets4/Kamat/Elections - Public Choice Replication Package/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
master_directory = "H:/DataSets4/Kamat/Elections - Public Choice Replication Package/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(tidystringdist)
library(rio)
library(ropensecretsapi)
library(sandwich)
library(lmtest)
library(estimatr)
library(ivreg)
master_directory = "H:/DataSets4/Kamat/Elections - Public Choice Replication Package/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(tidystringdist)
library(rio)
library(ropensecretsapi)
library(sandwich)
library(lmtest)
library(estimatr)
library(ivreg)
library(modelsummary)
# load data frame
state_elections_panel_df9 <- read_xlsx(paste0(data_folder,"Processing/state_elections_panel_df_20240709.xlsx"))
# First_Stage_Senate_House_Separated.R
# Goal: Script that performs first stage regressions that disaggregate Senators
# per million and House members per million to assess how much Senate representation
# and House representation separately contribute to variation in federal aid per
# resident
# Author: Shantanu Kamat
# Last update: 2025-08-14
master_directory = "H:/DataSets4/Kamat/Elections - Public Choice Replication Package/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(tidystringdist)
library(rio)
library(ropensecretsapi)
library(sandwich)
library(lmtest)
library(estimatr)
library(ivreg)
library(modelsummary)
# load data frame
state_elections_panel_df9 <- read_xlsx(paste0(data_folder,"Processing/state_elections_panel_df_20240709.xlsx"))
# add variables for sens_per_million and house_per_million
state_elections_panel_df10 <- state_elections_panel_df9 %>%
mutate(sens_per_million = 1000000 * 2/pop,
house_per_million = 1000000 * house_seats/pop)
# run first stage regressions with sens_per_million and house_per_million (rather than reps_per_million)
# including control for normal vote
first_stage_sens_house_with_norm_vote <- lm(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million + norm_vote_top2_share,
filter(state_elections_panel_df10, year >= 2020, year <= 2022))
summary(first_stage_sens_house_with_norm_vote)
coeftest(first_stage_sens_house_with_norm_vote, vcov = vcovCL, type = "HC1", cluster = ~state)
# first_stage_sens_house_with_norm_vote1 and first_stage_sens_house_without_norm_vote1
# are our preferred specifications for Senators per million and House members
# per million disaggregated.
first_stage_sens_house_with_norm_vote1 <- lm_robust(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million + norm_vote_top2_share,
data = filter(state_elections_panel_df10, year >= 2020, year <= 2022),
clusters = state,
se_type = "stata")
summary(first_stage_sens_house_with_norm_vote1)
# NOT including control for normal vote
first_stage_sens_house_without_norm_vote <- lm(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million,
filter(state_elections_panel_df10, year >= 2020, year <= 2022))
summary(first_stage_sens_house_without_norm_vote)
coeftest(first_stage_sens_house_without_norm_vote, vcov = vcovCL, type = "HC1", cluster = ~state)
# first_stage_sens_house_with_norm_vote1 and first_stage_sens_house_without_norm_vote1
# are our preferred specifications for Senators per million and House members
# per million disaggregated.
first_stage_sens_house_without_norm_vote1 <- lm_robust(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million,
data = filter(state_elections_panel_df10, year >= 2020, year <= 2022),
clusters = state,
se_type = "stata")
summary(first_stage_sens_house_without_norm_vote1)
# first_stage_sens_house_without_norm_vote1 and first_stage_sens_house_with_norm_vote1
# are used in columns 1 and 2 respectively of Table A.3
gof_map_custom <- data.frame(raw = c("nobs", "r.squared"),
clean = c("Observations", "R-squared"),
fmt = c(0, 3))
modelsummary(list(first_stage_sens_house_without_norm_vote1, first_stage_sens_house_with_norm_vote1),
stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
coef_omit = "(Intercept)",
coef_map = c("sens_per_million" = "Sens per Million",
"house_per_million" = "House per Million",
"norm_vote_top2_share" = "Normal Vote"),
gof_map = gof_map_custom,
add_rows = data.frame(term = "F-Stat",
m = format(round(first_stage_sens_house_without_norm_vote1$fstatistic[1], 1), nsmall = 0),
m = format(round(first_stage_sens_house_with_norm_vote1$fstatistic[1], 1), nsmall = 0)),
title = "First Stage Regressions with Senators per Million and House Members per Million Disaggregated",
notes = "Note")
broom::tidy(first_stage_sens_house_without_norm_vote1)
modelsummary(list(first_stage_sens_house_without_norm_vote1, first_stage_sens_house_with_norm_vote1),
fmt = 3,
stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
coef_omit = "(Intercept)",
coef_map = c("sens_per_million" = "Sens per Million",
"house_per_million" = "House per Million",
"norm_vote_top2_share" = "Normal Vote"),
gof_map = gof_map_custom,
add_rows = data.frame(term = "F-Stat",
m = format(round(first_stage_sens_house_without_norm_vote1$fstatistic[1], 1), nsmall = 0),
m = format(round(first_stage_sens_house_with_norm_vote1$fstatistic[1], 1), nsmall = 0)),
title = "First Stage Regressions with Senators per Million and House Members per Million Disaggregated",
notes = "Note")
modelsummary(list(first_stage_sens_house_without_norm_vote1, first_stage_sens_house_with_norm_vote1),
stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
coef_omit = "(Intercept)",
coef_map = c("sens_per_million" = "Sens per Million",
"house_per_million" = "House per Million",
"norm_vote_top2_share" = "Normal Vote"),
gof_map = gof_map_custom,
add_rows = data.frame(term = "F-Stat",
m = format(round(first_stage_sens_house_without_norm_vote1$fstatistic[1], 1), nsmall = 0),
m = format(round(first_stage_sens_house_with_norm_vote1$fstatistic[1], 1), nsmall = 0)),
title = "First Stage Regressions with Senators per Million and House Members per Million Disaggregated",
notes = "Note")
?texreg
master_directory = "H:/DataSets4/Kamat/Elections - Public Choice Replication Package/"
data_folder = paste0(master_directory,"Data/")
do_folder = paste0(master_directory,"Do/")
results_folder = paste0(master_directory,"Results/")
charts_folder = paste0(master_directory,"Charts/")
# Load libraries
library(tidyverse)
library(janitor)
library(readxl)
library(cdlTools)
library(fuzzyjoin)
library(haven)
library(rvest)
library(Quandl)
library(lubridate)
library(openxlsx)
library(tidystringdist)
library(rio)
library(ropensecretsapi)
library(sandwich)
library(lmtest)
library(estimatr)
library(ivreg)
library(modelsummary)
# load data frame
state_elections_panel_df9 <- read_xlsx(paste0(data_folder,"Processing/state_elections_panel_df_20240709.xlsx"))
# add variables for sens_per_million and house_per_million
state_elections_panel_df10 <- state_elections_panel_df9 %>%
mutate(sens_per_million = 1000000 * 2/pop,
house_per_million = 1000000 * house_seats/pop)
# run first stage regressions with sens_per_million and house_per_million (rather than reps_per_million)
# including control for normal vote
first_stage_sens_house_with_norm_vote <- lm(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million + norm_vote_top2_share,
filter(state_elections_panel_df10, year >= 2020, year <= 2022))
summary(first_stage_sens_house_with_norm_vote)
coeftest(first_stage_sens_house_with_norm_vote, vcov = vcovCL, type = "HC1", cluster = ~state)
# first_stage_sens_house_with_norm_vote1 and first_stage_sens_house_without_norm_vote1
# are our preferred specifications for Senators per million and House members
# per million disaggregated.
first_stage_sens_house_with_norm_vote1 <- lm_robust(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million + norm_vote_top2_share,
data = filter(state_elections_panel_df10, year >= 2020, year <= 2022),
clusters = state,
se_type = "stata")
summary(first_stage_sens_house_with_norm_vote1)
# NOT including control for normal vote
first_stage_sens_house_without_norm_vote <- lm(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million,
filter(state_elections_panel_df10, year >= 2020, year <= 2022))
summary(first_stage_sens_house_without_norm_vote)
coeftest(first_stage_sens_house_without_norm_vote, vcov = vcovCL, type = "HC1", cluster = ~state)
# first_stage_sens_house_with_norm_vote1 and first_stage_sens_house_without_norm_vote1
# are our preferred specifications for Senators per million and House members
# per million disaggregated.
first_stage_sens_house_without_norm_vote1 <- lm_robust(total_1k_muni_aid_per_resident ~ sens_per_million + house_per_million,
data = filter(state_elections_panel_df10, year >= 2020, year <= 2022),
clusters = state,
se_type = "stata")
summary(first_stage_sens_house_without_norm_vote1)
# first_stage_sens_house_without_norm_vote1 and first_stage_sens_house_with_norm_vote1
# are used in columns 1 and 2 respectively of Table A.3
gof_map_custom <- data.frame(raw = c("nobs", "r.squared"),
clean = c("Observations", "R-squared"),
fmt = c(0, 3))
modelsummary(list(first_stage_sens_house_without_norm_vote1, first_stage_sens_house_with_norm_vote1),
stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
coef_omit = "(Intercept)",
coef_map = c("sens_per_million" = "Sens per Million",
"house_per_million" = "House per Million",
"norm_vote_top2_share" = "Normal Vote"),
gof_map = gof_map_custom,
add_rows = data.frame(term = "F-Stat",
m = format(round(first_stage_sens_house_without_norm_vote1$fstatistic[1], 1), nsmall = 0),
m = format(round(first_stage_sens_house_with_norm_vote1$fstatistic[1], 1), nsmall = 0)),
title = "First Stage Regressions with Senators per Million and House Members per Million Disaggregated",
notes = "Note")
